Exploratory analysis: gender and cognitive competence

Attatching packages

Show the code
library(readr)
library(tidyverse)
library(broom)
library(corrplot)
library(MASS)
library(nlme)
library(ggeffects)
library(RColorBrewer)
load(url("http://www.rossmanchance.com/iscam3/ISCAM.RData"))

select <- dplyr::select

Loading in data

Show the code
allYrsFinal <- read_csv("FinalData/All Years Final Public - with Vars.csv")
allYrsFinal_NA <- allYrsFinal |>
  filter(cognitive.competence.pre != 0,
         cognitive.competence.post != 0)|>
  mutate(gender = case_when(is.na(gender) ~ "NA/Other",
                            TRUE~gender))

 allYrsFinal <- allYrsFinal |>
  filter(cognitive.competence.pre != 0,
         cognitive.competence.post != 0)

How Many NAs?

Show the code
allYrsFinal |>
  group_by(gender)|>
  count()
# A tibble: 3 × 2
# Groups:   gender [3]
  gender     n
  <chr>  <int>
1 Female  7478
2 Male    4189
3 <NA>      63

Correlation matrices by gender

For pre questions, for women

Show the code
competenceFull <- c("q6e.pre.a", "q6e.post.a", "q7a.pre.a", "q7a.post.a", "q8f.pre.a", "q8f.post.a", "q9a.pre.a", "q9a.post.a", "q9b.pre.a", "q9b.post.a", "q9e.pre.a", "q9e.post.a")

pre_only_F <- allYrsFinal|>
  filter(gender == "Female")|>
  select(all_of(competenceFull))|>
  select(ends_with("pre.a")) |>
  drop_na()
cor(pre_only_F)
          q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
q6e.pre.a 1.0000000 0.4800976 0.4308480 0.3894616 0.4401435 0.6347125
q7a.pre.a 0.4800976 1.0000000 0.3946282 0.4222106 0.4515881 0.5119590
q8f.pre.a 0.4308480 0.3946282 1.0000000 0.2979830 0.3694416 0.5146106
q9a.pre.a 0.3894616 0.4222106 0.2979830 1.0000000 0.6909989 0.4012884
q9b.pre.a 0.4401435 0.4515881 0.3694416 0.6909989 1.0000000 0.4718131
q9e.pre.a 0.6347125 0.5119590 0.5146106 0.4012884 0.4718131 1.0000000
Show the code
corrplot(cor(pre_only_F))

For pre questions, for men

Show the code
pre_only_M <- allYrsFinal|>
  filter(gender == "Male")|>
  select(all_of(competenceFull))|>
  select(ends_with("pre.a")) |>
  drop_na()
cor(pre_only_M)
          q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
q6e.pre.a 1.0000000 0.4725968 0.4301648 0.3839258 0.4374356 0.6181036
q7a.pre.a 0.4725968 1.0000000 0.3572141 0.4003075 0.4225857 0.4806798
q8f.pre.a 0.4301648 0.3572141 1.0000000 0.2886259 0.3566590 0.5043949
q9a.pre.a 0.3839258 0.4003075 0.2886259 1.0000000 0.6812080 0.4039653
q9b.pre.a 0.4374356 0.4225857 0.3566590 0.6812080 1.0000000 0.4772119
q9e.pre.a 0.6181036 0.4806798 0.5043949 0.4039653 0.4772119 1.0000000
Show the code
corrplot(cor(pre_only_M))

For pre questions, for NAs

Show the code
pre_only_NA <- allYrsFinal|>
  filter(is.na(gender))|>
  select(all_of(competenceFull))|>
  select(ends_with("pre.a")) |>
  drop_na()
cor(pre_only_NA)
          q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
q6e.pre.a 1.0000000 0.5852185 0.3654192 0.3483319 0.5205958 0.6564209
q7a.pre.a 0.5852185 1.0000000 0.2212318 0.4279232 0.4932185 0.6025653
q8f.pre.a 0.3654192 0.2212318 1.0000000 0.2081000 0.3183099 0.4659341
q9a.pre.a 0.3483319 0.4279232 0.2081000 1.0000000 0.6169784 0.5134717
q9b.pre.a 0.5205958 0.4932185 0.3183099 0.6169784 1.0000000 0.6528418
q9e.pre.a 0.6564209 0.6025653 0.4659341 0.5134717 0.6528418 1.0000000
Show the code
corrplot(cor(pre_only_NA))

For post questions, for women

Show the code
post_only_F <- allYrsFinal|>
  filter(gender == "Female")|>
  select(all_of(competenceFull))|>
  select(ends_with("post.a")) |>
  drop_na()
cor(post_only_F)
           q6e.post.a q7a.post.a q8f.post.a q9a.post.a q9b.post.a q9e.post.a
q6e.post.a  1.0000000  0.5630740  0.4195056  0.4363502  0.5021618  0.6828753
q7a.post.a  0.5630740  1.0000000  0.3963168  0.5889909  0.6303725  0.6265500
q8f.post.a  0.4195056  0.3963168  1.0000000  0.3038777  0.3883250  0.4635532
q9a.post.a  0.4363502  0.5889909  0.3038777  1.0000000  0.7556062  0.4806988
q9b.post.a  0.5021618  0.6303725  0.3883250  0.7556062  1.0000000  0.5709947
q9e.post.a  0.6828753  0.6265500  0.4635532  0.4806988  0.5709947  1.0000000
Show the code
corrplot(cor(post_only_F))

For post questions, for men

Show the code
post_only_M <- allYrsFinal|>
  filter(gender == "Male")|>
  select(all_of(competenceFull))|>
  select(ends_with("post.a")) |>
  drop_na()
cor(post_only_M)
           q6e.post.a q7a.post.a q8f.post.a q9a.post.a q9b.post.a q9e.post.a
q6e.post.a  1.0000000  0.5662949  0.4294258  0.4446943  0.4943005  0.6623038
q7a.post.a  0.5662949  1.0000000  0.3815573  0.5245169  0.5728703  0.6034080
q8f.post.a  0.4294258  0.3815573  1.0000000  0.2676293  0.3421776  0.4589553
q9a.post.a  0.4446943  0.5245169  0.2676293  1.0000000  0.7258889  0.4482372
q9b.post.a  0.4943005  0.5728703  0.3421776  0.7258889  1.0000000  0.5410897
q9e.post.a  0.6623038  0.6034080  0.4589553  0.4482372  0.5410897  1.0000000
Show the code
corrplot(cor(post_only_M))

For post questions, for NAs

Show the code
post_only_M <- allYrsFinal|>
  filter(gender == "Male")|>
  select(all_of(competenceFull))|>
  select(ends_with("post.a")) |>
  drop_na()
cor(post_only_M)
           q6e.post.a q7a.post.a q8f.post.a q9a.post.a q9b.post.a q9e.post.a
q6e.post.a  1.0000000  0.5662949  0.4294258  0.4446943  0.4943005  0.6623038
q7a.post.a  0.5662949  1.0000000  0.3815573  0.5245169  0.5728703  0.6034080
q8f.post.a  0.4294258  0.3815573  1.0000000  0.2676293  0.3421776  0.4589553
q9a.post.a  0.4446943  0.5245169  0.2676293  1.0000000  0.7258889  0.4482372
q9b.post.a  0.4943005  0.5728703  0.3421776  0.7258889  1.0000000  0.5410897
q9e.post.a  0.6623038  0.6034080  0.4589553  0.4482372  0.5410897  1.0000000
Show the code
corrplot(cor(post_only_M))

Calc Prerequisites

Show the code
calc_req_NA <- allYrsFinal|>
  filter(math.prereq == "Calculus", 
         cognitive.competence.pre != 0,
         cognitive.competence.post != 0)|>
  mutate(gender = case_when(is.na(gender) ~ "NA/Other",
                            TRUE~gender),   mastering.confidence.change = mastering.confidence.post - mastering.confidence.pre)


calc_req <- allYrsFinal|>
  filter(math.prereq == "Calculus", 
         !is.na(gender),
         cognitive.competence.pre != 0,
         cognitive.competence.post != 0)|>
  mutate(mastering.confidence.change = mastering.confidence.post - mastering.confidence.pre)


calc.fit <- lme(cognitive.competence.change ~ gender+ instructor.gender +  years.teaching.intro.stats.binned, 
                random = ~ 1+gender|instructor, data= calc_req)

plot(resid(calc.fit) ~ fitted(calc.fit), xlab = "Fitted", ylab="Residuals")
abline(h=0, lty = "dashed")

Show the code
#normal Q-Q plot of residuals 
qqnorm(resid(calc.fit), ylab="Residuals"); qqline(resid(calc.fit), lty = "dashed")

Show the code
#histogram of residuals
hist(resid(calc.fit), main = '', col="olivedrab", xlab = "Residuals")

Show the code
summary(calc.fit) 
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC      BIC    logLik
  1075.435 1111.755 -528.7176

Random effects:
 Formula: ~1 + gender | instructor
 Structure: General positive-definite, Log-Cholesky parametrization
            StdDev    Corr  
(Intercept) 0.3773246 (Intr)
genderMale  0.1897776 -0.2  
Residual    0.8244677       

Fixed effects:  cognitive.competence.change ~ gender + instructor.gender + years.teaching.intro.stats.binned 
                                             Value Std.Error  DF    t-value
(Intercept)                             0.18021904 0.2722960 413  0.6618498
genderMale                             -0.06048295 0.1399364 413 -0.4322175
instructor.genderMale                  -0.31608540 0.3156329   5 -1.0014337
years.teaching.intro.stats.binned10-20 -0.15129835 0.3107182   5 -0.4869312
years.teaching.intro.stats.binned5-10   0.03485842 0.1039343 413  0.3353888
                                       p-value
(Intercept)                             0.5084
genderMale                              0.6658
instructor.genderMale                   0.3626
years.teaching.intro.stats.binned10-20  0.6469
years.teaching.intro.stats.binned5-10   0.7375
 Correlation: 
                                       (Intr) gndrMl inst.M y....1
genderMale                             -0.154                     
instructor.genderMale                  -0.686 -0.109              
years.teaching.intro.stats.binned10-20 -0.352  0.000 -0.091       
years.teaching.intro.stats.binned5-10  -0.048  0.009 -0.129  0.132

Standardized Within-Group Residuals:
         Min           Q1          Med           Q3          Max 
-3.268132315 -0.597894311  0.008557559  0.628149508  2.998537004 

Number of Observations: 423
Number of Groups: 8 
Show the code
ranef(calc.fit) # coeffs for each instructor section
                (Intercept)  genderMale
Instructor177 -0.0562811602  0.08499587
Instructor182  0.0375606385  0.05362522
Instructor204  0.0007681933 -0.09347672
Instructor234  0.4673877482  0.06024862
Instructor40   0.2439284568 -0.03238776
Instructor73  -0.2027556280  0.02039623
Instructor95  -0.0378557422 -0.15187570
Instructor99  -0.4527525063  0.05847425

model without instructor gender

With instructor gender: * * residual StdDev = 0.8244677
* genderMale StdDev = 0.1897776

Without instructor gender:

  • residual StdDev = 0.8242596
  • genderMale StdDev = 0.1846574

With genderinstructor gender: residual StdDev = 0.8226201
* genderMale StdDev = 0.2226835

With/without models seem to explain a similar amount of variability gender*instructor gender explains a bit more variability

Show the code
calc.fit2 <- lme(cognitive.competence.change ~ gender +  years.teaching.intro.stats.binned, 
                random = ~ 1+gender|instructor, data= calc_req_NA)

plot(resid(calc.fit2) ~ fitted(calc.fit2), xlab = "Fitted", ylab="Residuals")
abline(h=0, lty = "dashed")

Show the code
#normal Q-Q plot of residuals 
qqnorm(resid(calc.fit2), ylab="Residuals"); qqline(resid(calc.fit), lty = "dashed")

Show the code
#histogram of residuals
hist(resid(calc.fit2), main = '', col="olivedrab", xlab = "Residuals")

Show the code
summary(calc.fit2)
Linear mixed-effects model fit by REML
  Data: calc_req_NA 
       AIC      BIC    logLik
  1081.911 1130.365 -528.9553

Random effects:
 Formula: ~1 + gender | instructor
 Structure: General positive-definite, Log-Cholesky parametrization
               StdDev    Corr         
(Intercept)    0.4071961 (Intr) gndrMl
genderMale     0.1846553 -0.399       
genderNA/Other 6.2170483 -0.001  0.000
Residual       0.8242597              

Fixed effects:  cognitive.competence.change ~ gender + years.teaching.intro.stats.binned 
                                             Value Std.Error  DF    t-value
(Intercept)                            -0.00573151  0.207789 413 -0.0275833
genderMale                             -0.08778980  0.136839 413 -0.6415544
genderNA/Other                         -0.02810501  6.271800 413 -0.0044812
years.teaching.intro.stats.binned10-20 -0.16252515  0.314972   6 -0.5159983
years.teaching.intro.stats.binned5-10   0.02051101  0.103159 413  0.1988285
                                       p-value
(Intercept)                             0.9780
genderMale                              0.5215
genderNA/Other                          0.9964
years.teaching.intro.stats.binned10-20  0.6243
years.teaching.intro.stats.binned5-10   0.8425
 Correlation: 
                                       (Intr) gndrMl gnNA/O y....1
genderMale                             -0.389                     
genderNA/Other                          0.000  0.002              
years.teaching.intro.stats.binned10-20 -0.563  0.009  0.000       
years.teaching.intro.stats.binned5-10  -0.179 -0.015 -0.005  0.122

Standardized Within-Group Residuals:
         Min           Q1          Med           Q3          Max 
-3.265842870 -0.612337175 -0.002866103  0.628048731  3.002408483 

Number of Observations: 424
Number of Groups: 8 

Turning some vars into factors for ggpredict

Show the code
calc_factor <- calc_req|>
  mutate(gender = factor(gender),
         instructor.gender= factor(instructor.gender),
         years.teaching.intro.stats.binned = factor( years.teaching.intro.stats.binned),
         instructor = factor(instructor))

calc.fit.2 random slopes graph

The lines look pretty parallel = the effect of gender on cognitive competence change is consistent across instructors

Show the code
calc.fit <- lme(cognitive.competence.change ~ gender+ instructor.gender +  years.teaching.intro.stats.binned, 
                random = ~ 1+gender|instructor, data= calc_factor)

calc_plot <- ggpredict(calc.fit, 
          terms = c("gender", "years.teaching.intro.stats.binned", "instructor [sample=8]"),
          type = "re")

ggplot(calc_plot, aes(x=x, y=predicted, group = group, color = group))+
  geom_line() +
  labs(y= "Cognitive Competence Change", x="Female = 0, Male = 1", color = "Years Teaching Intro Stats")

No significant interactions?

Show the code
calc.fit_interaction <- lme(cognitive.competence.change ~ gender * instructor.gender +  years.teaching.intro.stats.binned, 
                random = ~ 1|instructor, data= calc_req)

summary(calc.fit_interaction)
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC      BIC    logLik
  1073.433 1105.698 -528.7166

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.4225361 0.8259412

Fixed effects:  cognitive.competence.change ~ gender * instructor.gender + years.teaching.intro.stats.binned 
                                            Value Std.Error  DF    t-value
(Intercept)                             0.3139906 0.3144654 412  0.9984900
genderMale                             -0.5078141 0.3563894 412 -1.4248856
instructor.genderMale                  -0.3952205 0.3677724   5 -1.0746333
years.teaching.intro.stats.binned10-20 -0.1393314 0.3447903   5 -0.4041046
years.teaching.intro.stats.binned5-10   0.0320615 0.1044988 412  0.3068122
genderMale:instructor.genderMale        0.3696232 0.3675957 412  1.0055158
                                       p-value
(Intercept)                             0.3186
genderMale                              0.1549
instructor.genderMale                   0.3316
years.teaching.intro.stats.binned10-20  0.7028
years.teaching.intro.stats.binned5-10   0.7591
genderMale:instructor.genderMale        0.3152
 Correlation: 
                                       (Intr) gndrMl inst.M y....1 y....5
genderMale                             -0.336                            
instructor.genderMale                  -0.733  0.304                     
years.teaching.intro.stats.binned10-20 -0.324 -0.045 -0.100              
years.teaching.intro.stats.binned5-10  -0.038 -0.005 -0.116  0.119       
genderMale:instructor.genderMale        0.324 -0.970 -0.330  0.047  0.014

Standardized Within-Group Residuals:
          Min            Q1           Med            Q3           Max 
-3.2640557460 -0.5758305777 -0.0009392462  0.6432488302  3.0259104671 

Number of Observations: 423
Number of Groups: 8 

Checking of NAs and 0s in cognitive competence


  0-5 10-20  5-10 
  145    58   220 
 [1] "Instructor40_Section1_Fall_16-17"    "Instructor73_Section1_Spring_16-17" 
 [3] "Instructor95_Section1_Fall_16-17"    "Instructor99_Section1_Spring_16-17" 
 [5] "Instructor95_Section1_Fall_15-16"    "Instructor177_Section1_Fall_15-16"  
 [7] "Instructor177_Section2_Fall_15-16"   "Instructor182_Section1_Fall_15-16"  
 [9] "Instructor204_Section1_Spring_14-15" "Instructor95_Section1_Fall_14-15"   
[11] "Instructor234_Section1_Spring_14-15"
Show the code
competencePre <- c("q6e.pre.a",  "q7a.pre.a", "q8f.pre.a", "q9a.pre.a", "q9b.pre.a",  "q9e.pre.a")
competencePost <- c("q6e.post.a",  "q7a.post.a", "q8f.post.a", "q9a.post.a", "q9b.post.a",  "q9e.post.a")

calc_req|> # checking for NAs and 0s
  select(all_of(competencePre), cognitive.competence.pre)
# A tibble: 423 × 7
   q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
       <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
 1         4         5         4         5         5         5
 2         3         7         4         7         7         5
 3         6         5         5         6         6        NA
 4         6         6         5         6         6         6
 5         5         6         4         6         6         5
 6         5         5         5         6         6         5
 7         4         3         5         6         5         5
 8         5         6         4         6         6         4
 9         6         5         3         6         6         2
10         5         6         4         5         5         5
# ℹ 413 more rows
# ℹ 1 more variable: cognitive.competence.pre <dbl>
Show the code
table(is.na(calc_req$q7a.pre.a))

FALSE  TRUE 
  422     1 
Show the code
table(calc_req$cognitive.competence.pre == 0) # get rid of (then check for post - make sure that they only get a 0 on cognitive.competence.pre/post if they didn't fill out any)

FALSE 
  423 
Show the code
calc_req|>
  filter(is.na(q7a.pre.a), cognitive.competence.pre != 0)|>
  select(all_of(competencePre), cognitive.competence.pre)
# A tibble: 1 × 7
  q6e.pre.a q7a.pre.a q8f.pre.a q9a.pre.a q9b.pre.a q9e.pre.a
      <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
1        NA        NA         4         5         5         4
# ℹ 1 more variable: cognitive.competence.pre <dbl>
Show the code
calc_req|>
  filter(cognitive.competence.pre == 0) # none
# A tibble: 0 × 298
# ℹ 298 variables: instructor <chr>, section <chr>, instructor.section <chr>,
#   opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
#   test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
#   q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
#   q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
#   q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
#   q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …
Show the code
calc_req|>
  filter(cognitive.competence.post == 0) # none
# A tibble: 0 × 298
# ℹ 298 variables: instructor <chr>, section <chr>, instructor.section <chr>,
#   opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
#   test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
#   q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
#   q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
#   q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
#   q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …
Show the code
table(is.na(calc_req$q7a.post.a))

FALSE  TRUE 
  421     2 
Show the code
table(calc_req$cognitive.competence.post == 0) # get rid of (then check for post - make sure that they only get a 0 on cognitive.competence.pre/post if they didn't fill out any)

FALSE 
  423 
Show the code
# we can remove cognitive.competencepre/post from dataset

calc_req|>
  filter(is.na(q7a.post.a), cognitive.competence.post != 0)|> 
  select(all_of(competencePost), cognitive.competence.post)
# A tibble: 2 × 7
  q6e.post.a q7a.post.a q8f.post.a q9a.post.a q9b.post.a q9e.post.a
       <dbl>      <dbl>      <dbl>      <dbl>      <dbl>      <dbl>
1          4         NA          4          3          2          5
2          5         NA          5          6          6          6
# ℹ 1 more variable: cognitive.competence.post <dbl>
Show the code
length(unique(calc_req$instructor.section))
[1] 11
Show the code
boxplot(resid(calc.fit) ~ calc_req$carnegie.classification, xlab = "instructor section", ylab="Residuals")

Show the code
length(resid(calc.fit))
[1] 423
Show the code
length(calc_req$instructor.section)
[1] 423
Show the code
nrow(calc_req)
[1] 423
Show the code
iscamsummary(resid(calc.fit))
missing       n    Min       Q1  Median      Q3     Max    Mean      SD 
      0     423  -2.694  -0.493   0.007   0.518   2.472       0   0.815 
Show the code
iscamsummary(calc_req$cognitive.competence.change)
missing       n    Min       Q1  Median      Q3     Max    Mean      SD 
      0     423      -3  -0.667  -0.167   0.333   2.333  -0.184   0.851 
Show the code
table(calc_req$gender)

Female   Male 
   258    165 
Show the code
table(calc_req$instructor.gender)

Female   Male 
    51    372 
Show the code
table(calc_req$years.teaching.experience.binned)

  0-5 10-20  5-10 
   31    65   208 
Show the code
View(calc_req$years.teaching.experience.binned)
Show the code
calc_req|>
  ggplot(aes(x=cognitive.competence.pre, y = cognitive.competence.post)) +
  geom_point()

Show the code
calc_req|>
  filter(cognitive.competence.pre == 0)
# A tibble: 0 × 298
# ℹ 298 variables: instructor <chr>, section <chr>, instructor.section <chr>,
#   opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
#   test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
#   q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
#   q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
#   q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
#   q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …
Show the code
calc_req|>
  filter(is.na(years.teaching.experience.binned), 
         instructor == "Instructor204")|>
  select(institution, textbook.used, textbook.classification, carnegie.classification, instructor.section, years.teaching.intro.stats)
# A tibble: 10 × 6
   institution textbook.used textbook.classification carnegie.classification
         <dbl> <chr>         <chr>                   <chr>                  
 1         108 ISCAM         ISI                     Baccalaureate College  
 2         108 ISCAM         ISI                     Baccalaureate College  
 3         108 ISCAM         ISI                     Baccalaureate College  
 4         108 ISCAM         ISI                     Baccalaureate College  
 5         108 ISCAM         ISI                     Baccalaureate College  
 6         108 ISCAM         ISI                     Baccalaureate College  
 7         108 ISCAM         ISI                     Baccalaureate College  
 8         108 ISCAM         ISI                     Baccalaureate College  
 9         108 ISCAM         ISI                     Baccalaureate College  
10         108 ISCAM         ISI                     Baccalaureate College  
# ℹ 2 more variables: instructor.section <chr>,
#   years.teaching.intro.stats <dbl>

Regular regression with a gender*instructor term

Instructors that look worse for women and better for men:

  • Instructor 99
  • Instructor 182
  • Instructor 234
  • Instructor 77

Instructors that look worse for men and better for women * Instructor 40 (Female) * Instructor 204 (Female) * Instructor 95

Instructor177 had a baseline cognitive competence change of -0.44 204, 243, and 40 were significant (204 was borderline significant) genderMale:Instructor204 was borderline significant

Show the code
# reference instructor: 177
calc_fit_reg <- lm(cognitive.competence.change ~ gender*instructor + years.teaching.intro.stats.binned, data= calc_req)

summary(calc_fit_reg)

Call:
lm(formula = cognitive.competence.change ~ gender * instructor + 
    years.teaching.intro.stats.binned, data = calc_req)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.69950 -0.53191  0.02778  0.56060  2.46900 

Coefficients: (2 not defined because of singularities)
                                        Estimate Std. Error t value Pr(>|t|)   
(Intercept)                            -0.447917   0.206071  -2.174  0.03031 * 
genderMale                              0.211075   0.279688   0.755  0.45088   
instructorInstructor182                 0.059028   0.394596   0.150  0.88116   
instructorInstructor204                 0.753472   0.394596   1.909  0.05690 . 
instructorInstructor234                 0.883083   0.435446   2.028  0.04321 * 
instructorInstructor40                  1.197917   0.460789   2.600  0.00967 **
instructorInstructor73                  0.394153   0.253738   1.553  0.12111   
instructorInstructor95                  0.314080   0.227560   1.380  0.16828   
instructorInstructor99                 -0.618750   0.422320  -1.465  0.14366   
years.teaching.intro.stats.binned10-20        NA         NA      NA       NA   
years.teaching.intro.stats.binned5-10  -0.001833   0.106108  -0.017  0.98622   
genderMale:instructorInstructor182      0.082576   0.537150   0.154  0.87790   
genderMale:instructorInstructor204     -1.099963   0.601105  -1.830  0.06800 . 
genderMale:instructorInstructor234     -0.154212   0.504065  -0.306  0.75981   
genderMale:instructorInstructor40      -0.655519   0.601105  -1.091  0.27613   
genderMale:instructorInstructor73             NA         NA      NA       NA   
genderMale:instructorInstructor95      -0.469343   0.297841  -1.576  0.11584   
genderMale:instructorInstructor99       0.286148   0.520322   0.550  0.58266   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8243 on 407 degrees of freedom
Multiple R-squared:  0.09533,   Adjusted R-squared:  0.06198 
F-statistic: 2.859 on 15 and 407 DF,  p-value: 0.0002762
Show the code
color_instr <- rainbow(length(unique(calc_req$instructor)))
color_instr_gender <- rainbow(length(unique(calc_req$instructor.gender)))

interaction.plot(x.factor = calc_req$gender, trace.factor = calc_req$instructor, response = calc_req$cognitive.competence.change,
                 xlab="Gender",
                 ylab="Cognitive Competence Change",
                 trace.label="Instructor",
                 col = color_instr)

Show the code
interaction.plot(x.factor = calc_req$gender, trace.factor = calc_req$instructor.gender, response = calc_req$cognitive.competence.change,
                 xlab="Gender",
                 ylab="Cognitive Competence Change",
                 trace.label="Instructor Gender", col=color_instr_gender)

Show the code
calc_req|>
  select(instructor, instructor.gender)|>
  distinct()
# A tibble: 8 × 2
  instructor    instructor.gender
  <chr>         <chr>            
1 Instructor40  Female           
2 Instructor73  Female           
3 Instructor95  Male             
4 Instructor99  Male             
5 Instructor177 Male             
6 Instructor182 Male             
7 Instructor204 Female           
8 Instructor234 Male             

Significant instructors (significantly diff from 177)

Show the code
calc_req|>
  filter(instructor=="Instructor204" |
         instructor == "Instructor234" |
         instructor == "Instructor40")|>
  group_by(instructor, instructor.gender)|>
  summarize(mean_cog_comp_change = mean(cognitive.competence.change, na.rm = TRUE), 
            mean_ach_gain = mean(ach.gain.24, na.rm = TRUE), instructor.gender, 
            .groups='drop')|>
  distinct()
# A tibble: 3 × 4
  instructor    instructor.gender mean_cog_comp_change mean_ach_gain
  <chr>         <chr>                            <dbl>         <dbl>
1 Instructor204 Female                         -0.0500         0.363
2 Instructor234 Male                            0.477          0.403
3 Instructor40  Female                          0.483          0.101

Fitting other models to find something interesting

Show the code
calc_req|>
  ggplot(aes(x=mastering.confidence.pre, y = mastering.confidence.post)) +
  geom_point()

Show the code
plot(mastering.confidence.pre ~ mastering.confidence.post, data= calc_req)

Show the code
calc_req|>
  filter(is.na(mastering.confidence.change))
# A tibble: 1 × 298
  instructor   section instructor.section opt.out.pre opt.out.post test.time.pre
  <chr>        <chr>   <chr>                    <dbl>        <dbl>         <dbl>
1 Instructor95 1       Instructor95_Sect…           1            1            45
# ℹ 292 more variables: test.time.post <dbl>, q16a.pre.c.1415 <chr>,
#   q16b.pre.c.1415 <chr>, q16c.pre.c.1415 <chr>, q16a.pre.c <chr>,
#   q16b.pre.c <chr>, q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>,
#   q17.pre.c <chr>, q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>,
#   q21.pre.c <chr>, q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>,
#   q25.pre.c <chr>, q26.pre.c <chr>, q27.pre.c <chr>, q28.pre.c <chr>,
#   q29.pre.c <chr>, q30.pre.c <chr>, q31.pre.c <chr>, q32.pre.c <chr>, …
Show the code
calc_req|>
  filter(mastering.confidence.pre ==0 | mastering.confidence.post == 0)
# A tibble: 0 × 298
# ℹ 298 variables: instructor <chr>, section <chr>, instructor.section <chr>,
#   opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
#   test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
#   q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
#   q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
#   q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
#   q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …

lme notes: fixed effects: coeffocoemts that are constant across individuals or entities random effects: vary among individuals or entities

Model with mastering.confidence.change

How cog comp change is influenced by gender, years teaching intro stats, and mastering confidence change. Random effects structure allows the intercept and slope for gender to vary by instructor.

Random effects

* Intercept for instructor: StdDev very close to zero -> very little variability in the intercept across different instructors

  • genderMale: StdDev = 0.166 -> some variability in the effect of being male across different instructors

  • Residual: StdDev = 0.708 -> within group variation (how to better interpret?)

Fixed Effects * Intercept: Intercept is not significantly different from 0

  • genderMale: gender does not have a signifcant effect on cog comp change

  • years.teaching.intro.stats does not have a significant effect on cog comp change

  • mastering.confidence.change: coeff = 0.3547 and significantly affects cog comp change

Show the code
calc.fit3 <- lme(cognitive.competence.change ~ gender +  years.teaching.intro.stats.binned + mastering.confidence.change, 
                random = ~ 1+gender|instructor, data= calc_req, na.action=na.omit)
plot(resid(calc.fit3) ~ fitted(calc.fit3), xlab = "Fitted", ylab="Residuals")
abline(h=0, lty = "dashed")

Show the code
#normal Q-Q plot of residuals 
qqnorm(resid(calc.fit3), ylab="Residuals"); qqline(resid(calc.fit3), lty = "dashed")

Show the code
#histogram of residuals
hist(resid(calc.fit3), main = '', col="olivedrab", xlab = "Residuals")

Show the code
summary(calc.fit3)
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC      BIC   logLik
  942.6339 978.9317 -462.317

Random effects:
 Formula: ~1 + gender | instructor
 Structure: General positive-definite, Log-Cholesky parametrization
            StdDev        Corr  
(Intercept) 0.00006540548 (Intr)
genderMale  0.16648662701 0.071 
Residual    0.70849385189       

Fixed effects:  cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +      mastering.confidence.change 
                                            Value  Std.Error  DF   t-value
(Intercept)                             0.0277877 0.06611520 411  0.420292
genderMale                             -0.0134283 0.10965600 411 -0.122458
years.teaching.intro.stats.binned10-20 -0.1231203 0.12508382   6 -0.984302
years.teaching.intro.stats.binned5-10   0.0397746 0.07785070 411  0.510909
mastering.confidence.change             0.3547183 0.02708736 411 13.095342
                                       p-value
(Intercept)                             0.6745
genderMale                              0.9026
years.teaching.intro.stats.binned10-20  0.3630
years.teaching.intro.stats.binned5-10   0.6097
mastering.confidence.change             0.0000
 Correlation: 
                                       (Intr) gndrMl y....1 y....5
genderMale                             -0.246                     
years.teaching.intro.stats.binned10-20 -0.426 -0.209              
years.teaching.intro.stats.binned5-10  -0.693  0.030  0.357       
mastering.confidence.change             0.191 -0.009  0.004  0.005

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-3.0369771 -0.5960189  0.0232673  0.6103688  3.6986830 

Number of Observations: 422
Number of Groups: 8 

Interaction plot : why just one line?

Show the code
calc.fit3_fct <- lme(cognitive.competence.change ~ gender +  years.teaching.intro.stats.binned + mastering.confidence.change,
                random = ~ 1+gender|instructor, data= calc_factor, na.action=na.omit)

plot_calc_3 <- ggpredict(calc.fit3_fct, terms = c("gender", "instructor [sample=8]"), type = "re")

ggplot(plot_calc_3, aes(x=x, y =predicted, group = group, color = group)) +
  geom_line(alpha = 0.5) +
  labs(y= "Cognitive Competence Change", x= "Female = 0, Male = 1", color = "instructor")

genderMale*mastering.confidence.change not significant

Show the code
calc.fit3_interaction <- lme(cognitive.competence.change ~ gender*mastering.confidence.change +  years.teaching.intro.stats.binned,
                random = ~ 1+gender|instructor, data= calc_req, na.action=na.omit)
# plot(resid(calc.fit3_interaction) ~ fitted(calc.fit3_interaction), xlab = "Fitted", ylab="Residuals")
# abline(h=0, lty = "dashed")
# #normal Q-Q plot of residuals 
# qqnorm(resid(calc.fit3_interaction), ylab="Residuals"); qqline(resid(calc.fit3_interaction), lty = "dashed")
# #histogram of residuals
# hist(resid(calc.fit3_interaction), main = '', col="olivedrab", xlab = "Residuals")

summary(calc.fit3_interaction)
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC     BIC    logLik
  948.4651 988.772 -464.2326

Random effects:
 Formula: ~1 + gender | instructor
 Structure: General positive-definite, Log-Cholesky parametrization
            StdDev        Corr  
(Intercept) 0.00002614325 (Intr)
genderMale  0.16404158301 0.062 
Residual    0.70931187429       

Fixed effects:  cognitive.competence.change ~ gender * mastering.confidence.change +      years.teaching.intro.stats.binned 
                                            Value  Std.Error  DF   t-value
(Intercept)                             0.0246704 0.06680131 410  0.369310
genderMale                             -0.0057739 0.11188205 410 -0.051607
mastering.confidence.change             0.3476056 0.03468662 410 10.021316
years.teaching.intro.stats.binned10-20 -0.1224038 0.12497709   6 -0.979410
years.teaching.intro.stats.binned5-10   0.0391039 0.07793732 410  0.501736
genderMale:mastering.confidence.change  0.0184800 0.05564323 410  0.332115
                                       p-value
(Intercept)                             0.7121
genderMale                              0.9589
mastering.confidence.change             0.0000
years.teaching.intro.stats.binned10-20  0.3652
years.teaching.intro.stats.binned5-10   0.6161
genderMale:mastering.confidence.change  0.7400
 Correlation: 
                                       (Intr) gndrMl mstr.. y....1 y....5
genderMale                             -0.269                            
mastering.confidence.change             0.233 -0.147                     
years.teaching.intro.stats.binned10-20 -0.422 -0.204  0.005              
years.teaching.intro.stats.binned5-10  -0.682  0.022  0.021  0.357       
genderMale:mastering.confidence.change -0.137  0.225 -0.624 -0.002 -0.028

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.04913940 -0.57996809  0.01516545  0.61500080  3.68972580 

Number of Observations: 422
Number of Groups: 8 

Without gender as a random effect, gender is significant => at least 1 instructor has a different gender effect

Show the code
calc.fit3_noRandGender <- lme(cognitive.competence.change ~ gender +  years.teaching.intro.stats.binned + mastering.confidence.change, 
                random = ~ 1|instructor, data= calc_req, na.action=na.omit)

summary(calc.fit3_noRandGender)
Linear mixed-effects model fit by REML
  Data: calc_req 
      AIC      BIC   logLik
  940.604 968.8356 -463.302

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.1103604 0.7105646

Fixed effects:  cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +      mastering.confidence.change 
                                            Value  Std.Error  DF   t-value
(Intercept)                             0.0837677 0.08988791 411  0.931913
genderMale                             -0.1178220 0.07291363 411 -1.615912
years.teaching.intro.stats.binned10-20 -0.0946135 0.14424061   6 -0.655942
years.teaching.intro.stats.binned5-10   0.0489552 0.08252204 411  0.593238
mastering.confidence.change             0.3529208 0.02719360 411 12.978082
                                       p-value
(Intercept)                             0.3519
genderMale                              0.1069
years.teaching.intro.stats.binned10-20  0.5362
years.teaching.intro.stats.binned5-10   0.5533
mastering.confidence.change             0.0000
 Correlation: 
                                       (Intr) gndrMl y....1 y....5
genderMale                             -0.330                     
years.teaching.intro.stats.binned10-20 -0.528 -0.050              
years.teaching.intro.stats.binned5-10  -0.408 -0.016  0.257       
mastering.confidence.change             0.113  0.029  0.015 -0.016

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-2.98810623 -0.59003822  0.04044726  0.61318375  3.71753447 

Number of Observations: 422
Number of Groups: 8 

Interaction plot (why one line?)

Show the code
calc.fit3_RandGender_int <- lme(cognitive.competence.change ~ years.teaching.intro.stats.binned + mastering.confidence.change*gender, 
                random = ~ 1+gender|instructor, data= calc_factor, na.action=na.omit)

plot_confidence_interaction3 <- ggpredict(calc.fit3_RandGender_int, terms = c("gender", "instructor [sample=8]"), type = "re")

ggplot(plot_confidence_interaction3, aes(x=x, y = predicted, group = group, color = group)) +
  geom_line(alpha = 0.5) +
  labs(y="Cognitive Competence Change", x= "Female = 0, Male = 1", color = "instructor")

Show the code
calc.fit3_noRandGender_int <- lme(cognitive.competence.change ~ years.teaching.intro.stats.binned + mastering.confidence.change*gender, 
                random = ~ 1|instructor, data= calc_req, na.action=na.omit)
summary(calc.fit3_noRandGender_int)
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC      BIC    logLik
  946.3881 978.6336 -465.1941

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.1051965 0.7114321

Fixed effects:  cognitive.competence.change ~ years.teaching.intro.stats.binned +      mastering.confidence.change * gender 
                                            Value  Std.Error  DF   t-value
(Intercept)                             0.0764787 0.08897967 410  0.859507
years.teaching.intro.stats.binned10-20 -0.0928692 0.14192040   6 -0.654375
years.teaching.intro.stats.binned5-10   0.0479935 0.08234823 410  0.582811
mastering.confidence.change             0.3441004 0.03490555 410  9.858040
genderMale                             -0.1045880 0.07857583 410 -1.331046
mastering.confidence.change:genderMale  0.0232753 0.05515420 410  0.422005
                                       p-value
(Intercept)                             0.3906
years.teaching.intro.stats.binned10-20  0.5371
years.teaching.intro.stats.binned5-10   0.5603
mastering.confidence.change             0.0000
genderMale                              0.1839
mastering.confidence.change:genderMale  0.6732
 Correlation: 
                                       (Intr) y....1 y....5 mstr.. gndrMl
years.teaching.intro.stats.binned10-20 -0.520                            
years.teaching.intro.stats.binned5-10  -0.411  0.264                     
mastering.confidence.change             0.164  0.015  0.008              
genderMale                             -0.351 -0.051 -0.029 -0.213       
mastering.confidence.change:genderMale -0.118 -0.006 -0.035 -0.627  0.373

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.00492939 -0.60936531  0.03300359  0.62403105  3.70628393 

Number of Observations: 422
Number of Groups: 8 

Loop through other attitudes * all look significant * no significant interactions * Affect and Interest have the “most” significant interactions

Show the code
other_attitudes_change <- c("affect.change", "difficulty.change", "effort.change", "interest.change", "value.change")

summaries <- list()
  
for (change in other_attitudes_change){
  formula <- as.formula(paste("cognitive.competence.change ~ gender * ", change))
  
  calc.fit.loop <- lme(formula, random = ~ 1| instructor, data = calc_req, na.action = na.omit)
  summaries[[change]] <- summary(calc.fit.loop)
  
}
  
for (change in other_attitudes_change) {
  print(summaries[[change]])
}
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC     BIC    logLik
  824.1318 848.359 -406.0659

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:  0.08192137 0.6198973

Fixed effects:  list(formula) 
                              Value  Std.Error  DF   t-value p-value
(Intercept)              -0.0330696 0.05844999 412 -0.565776  0.5719
genderMale               -0.0871045 0.06459843 412 -1.348400  0.1783
affect.change             0.6442851 0.04132587 412 15.590357  0.0000
genderMale:affect.change -0.0999716 0.06499968 412 -1.538033  0.1248
 Correlation: 
                         (Intr) gndrMl affct.
genderMale               -0.475              
affect.change             0.063 -0.078       
genderMale:affect.change -0.064  0.205 -0.629

Standardized Within-Group Residuals:
       Min         Q1        Med         Q3        Max 
-2.9862456 -0.6266261  0.1209534  0.6497055  3.0009020 

Number of Observations: 423
Number of Groups: 8 
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC      BIC    logLik
  1001.734 1025.961 -494.8671

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.2789162 0.7609141

Fixed effects:  list(formula) 
                                  Value  Std.Error  DF   t-value p-value
(Intercept)                  -0.1287321 0.12217457 412 -1.053674  0.2926
genderMale                   -0.1033518 0.07995907 412 -1.292559  0.1969
difficulty.change             0.5345103 0.07048952 412  7.582834  0.0000
genderMale:difficulty.change -0.1043797 0.11365716 412 -0.918373  0.3590
 Correlation: 
                             (Intr) gndrMl dffcl.
genderMale                   -0.318              
difficulty.change            -0.095  0.096       
genderMale:difficulty.change  0.030 -0.055 -0.605

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.62432245 -0.60982704  0.01414437  0.66459365  2.93700352 

Number of Observations: 423
Number of Groups: 8 
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC     BIC    logLik
  1061.193 1085.42 -524.5965

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.3205431 0.8149323

Fixed effects:  list(formula) 
                               Value  Std.Error  DF    t-value p-value
(Intercept)               0.08087320 0.14408229 412  0.5612987  0.5749
genderMale               -0.10833797 0.11722934 412 -0.9241541  0.3559
effort.change             0.13457046 0.05083584 412  2.6471574  0.0084
genderMale:effort.change  0.07015743 0.09188282 412  0.7635533  0.4456
 Correlation: 
                         (Intr) gndrMl effrt.
genderMale               -0.354              
effort.change             0.311 -0.390       
genderMale:effort.change -0.212  0.685 -0.551

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.38751981 -0.62868174  0.02309865  0.68027605  2.99375762 

Number of Observations: 423
Number of Groups: 8 
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC     BIC    logLik
  1025.402 1049.63 -506.7012

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.2897189 0.7814186

Fixed effects:  list(formula) 
                                 Value  Std.Error  DF   t-value p-value
(Intercept)                 0.12449583 0.12937907 412  0.962256  0.3365
genderMale                 -0.09332598 0.09270482 412 -1.006700  0.3147
interest.change             0.25009316 0.04980093 412  5.021857  0.0000
genderMale:interest.change  0.12936340 0.08683203 412  1.489812  0.1370
 Correlation: 
                           (Intr) gndrMl intrs.
genderMale                 -0.329              
interest.change             0.233 -0.262       
genderMale:interest.change -0.139  0.471 -0.566

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.62657610 -0.64211979  0.02605822  0.66592010  2.79926989 

Number of Observations: 423
Number of Groups: 8 
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC     BIC    logLik
  972.6517 996.879 -480.3259

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.3001982 0.7337543

Fixed effects:  list(formula) 
                             Value  Std.Error  DF   t-value p-value
(Intercept)              0.0519067 0.12712010 412  0.408328  0.6832
genderMale              -0.1404155 0.07870246 412 -1.784131  0.0751
value.change             0.5388321 0.05914037 412  9.111071  0.0000
genderMale:value.change -0.0336186 0.10686670 412 -0.314585  0.7532
 Correlation: 
                        (Intr) gndrMl vl.chn
genderMale              -0.296              
value.change             0.080 -0.087       
genderMale:value.change -0.071  0.209 -0.550

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.77169565 -0.58074059 -0.01509698  0.67853073  2.33993508 

Number of Observations: 423
Number of Groups: 8 

Model with textbook classification

Random effects:

  • Intercept: StdDev = 0.2181 -> some variability in cog comp change can be attributed to differences between instructors

  • Residuals: StdDev = 0.8274 -> variation in cog comp change within instructors not explained by fixed effects

Fixed effects:

  • Intercept: baseline level of cog comp change for reference group (females with instructors with less than 5 years of teaching) -> not significant

  • genderMale: coeff = -0.1882, p = 0.0305 -> being male has a significant negative effect on cog comp change

  • years teaching intro stats (10-20) does not have a significant effect on cog comp change compared with those with 0-5 years experience

  • years teaching intro stats (5-10) has a positive signifcant increase on cog comp change than those with 0-5 years experience

  • NotSBI, NotSBI2, and Other aren’t different from ISI but OtherSBI is ???

There are gender differences for genders using the same textbook

Show the code
calc.fit5 <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification, 
                random = ~ 1|instructor, data= calc_req_NA, na.action=na.omit)

summary(calc.fit5)
Linear mixed-effects model fit by REML
  Data: calc_req_NA 
       AIC      BIC    logLik
  1073.381 1117.692 -525.6903

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.2408299 0.8265278

Fixed effects:  cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +      textbook.classification 
                                            Value Std.Error  DF    t-value
(Intercept)                            -0.1604085 0.2587134 412 -0.6200238
genderMale                             -0.1882161 0.0866918 412 -2.1710936
genderNA/Other                         -0.0072920 0.8291315 412 -0.0087947
years.teaching.intro.stats.binned10-20  0.0267763 0.3376069   3  0.0793121
years.teaching.intro.stats.binned5-10   0.7831209 0.3950516 412  1.9823254
textbook.classificationNotSBI          -0.1309499 0.3316226   3 -0.3948763
textbook.classificationNotSBI2          0.7566714 0.4387838   3  1.7244744
textbook.classificationOther            0.0042100 0.3982526   3  0.0105713
textbook.classificationOtherSBI        -0.7820871 0.3897670 412 -2.0065501
                                       p-value
(Intercept)                             0.5356
genderMale                              0.0305
genderNA/Other                          0.9930
years.teaching.intro.stats.binned10-20  0.9418
years.teaching.intro.stats.binned5-10   0.0481
textbook.classificationNotSBI           0.7193
textbook.classificationNotSBI2          0.1831
textbook.classificationOther            0.9922
textbook.classificationOtherSBI         0.0455
 Correlation: 
                                       (Intr) gndrMl gnNA/O y....1 y....5
genderMale                             -0.127                            
genderNA/Other                         -0.004  0.035                     
years.teaching.intro.stats.binned10-20 -0.750 -0.028 -0.001              
years.teaching.intro.stats.binned5-10  -0.633 -0.086 -0.003  0.496       
textbook.classificationNotSBI          -0.770  0.016  0.001  0.588  0.501
textbook.classificationNotSBI2         -0.575 -0.044 -0.002  0.446  0.384
textbook.classificationOther            0.001 -0.011  0.000 -0.357  0.001
textbook.classificationOtherSBI         0.563  0.097 -0.007 -0.443 -0.963
                                       tx.NSBI t.NSBI2 txtb.O
genderMale                                                   
genderNA/Other                                               
years.teaching.intro.stats.binned10-20                       
years.teaching.intro.stats.binned5-10                        
textbook.classificationNotSBI                                
textbook.classificationNotSBI2          0.452                
textbook.classificationOther            0.000   0.000        
textbook.classificationOtherSBI        -0.447  -0.343  -0.001

Standardized Within-Group Residuals:
         Min           Q1          Med           Q3          Max 
-3.235170712 -0.612511978 -0.007571671  0.623440822  3.015879131 

Number of Observations: 424
Number of Groups: 8 

Random slopes (textbook classification)

Show the code
calc.fit5_fct <- lme(cognitive.competence.change ~ gender + + textbook.classification+years.teaching.intro.stats.binned , 
                random = ~ 1|instructor, data= calc_factor, na.action=na.omit)

calc5_plot <- ggpredict(calc.fit5_fct, 
          terms = c("gender", "textbook.classification", "instructor [sample=8]"),
          type = "re")

ggplot(calc5_plot, aes(x=x, y=predicted, group = group, color = group))+
  geom_line() +
  labs(y= "Cognitive Competence Change", x="Female = 0, Male = 1", color = "textbook classification")

Investigate “imbalance” of gender across textbooks Proportions of males for each textbook classification

Show the code
unique(calc_req_NA$textbook.classification)
[1] "NotSBI2"  "NotSBI"   "OtherSBI" "ISI"      "Other"   
Show the code
calc_req_NA|>
  group_by(textbook.classification)|>
  summarize(
    total = n(),
    num_males = sum(gender == "Male")
  ) |>
  mutate(prop_males = num_males/total)
# A tibble: 5 × 4
  textbook.classification total num_males prop_males
  <chr>                   <int>     <int>      <dbl>
1 ISI                       154        73      0.474
2 NotSBI                     48        12      0.25 
3 NotSBI2                    10         6      0.6  
4 Other                      13         7      0.538
5 OtherSBI                  199        67      0.337

With textbook.classification*gender, genderMale p = 0.1770

No significant interaction terms

There is some imbalance of gender across the textbooks, but not enough to be signifcant?

Show the code
calc.fit5_interaction <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +  textbook.classification * gender, 
                random = ~ 1|instructor, data= calc_req, na.action = na.omit)

summary(calc.fit5_interaction)
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC      BIC    logLik
  1077.927 1134.188 -524.9636

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.4228149 0.8269005

Fixed effects:  cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +      textbook.classification * gender 
                                                Value Std.Error  DF    t-value
(Intercept)                                -0.1605701 0.4352078 408 -0.3689504
genderMale                                 -0.1877899 0.1388554 408 -1.3524129
years.teaching.intro.stats.binned10-20      0.0433712 0.5448395   3  0.0796036
years.teaching.intro.stats.binned5-10       0.7829532 0.6320353 408  1.2387807
textbook.classificationNotSBI              -0.2871487 0.5586817   3 -0.5139754
textbook.classificationNotSBI2              0.9105701 0.7342475   3  1.2401405
textbook.classificationOther               -0.2716900 0.6381124   3 -0.4257713
textbook.classificationOtherSBI            -0.7674213 0.6366768 408 -1.2053544
genderMale:textbook.classificationNotSBI    0.4167518 0.4204712 408  0.9911542
genderMale:textbook.classificationNotSBI2  -0.2566546 0.5515276 408 -0.4653522
genderMale:textbook.classificationOther     0.4814407 0.4805437 408  1.0018666
genderMale:textbook.classificationOtherSBI -0.0427938 0.1862951 408 -0.2297100
                                           p-value
(Intercept)                                 0.7124
genderMale                                  0.1770
years.teaching.intro.stats.binned10-20      0.9416
years.teaching.intro.stats.binned5-10       0.2161
textbook.classificationNotSBI               0.6427
textbook.classificationNotSBI2              0.3031
textbook.classificationOther                0.6990
textbook.classificationOtherSBI             0.2288
genderMale:textbook.classificationNotSBI    0.3222
genderMale:textbook.classificationNotSBI2   0.6419
genderMale:textbook.classificationOther     0.3170
genderMale:textbook.classificationOtherSBI  0.8184
 Correlation: 
                                           (Intr) gndrMl y....1 y....5 tx.NSBI
genderMale                                 -0.121                             
years.teaching.intro.stats.binned10-20     -0.784 -0.025                      
years.teaching.intro.stats.binned5-10      -0.668 -0.086  0.544               
textbook.classificationNotSBI              -0.779  0.094  0.611  0.520        
textbook.classificationNotSBI2             -0.593  0.072  0.465  0.396  0.462 
textbook.classificationOther               -0.013  0.104 -0.319 -0.009  0.010 
textbook.classificationOtherSBI             0.625  0.169 -0.520 -0.980 -0.487 
genderMale:textbook.classificationNotSBI    0.040 -0.330  0.008  0.029 -0.258 
genderMale:textbook.classificationNotSBI2   0.030 -0.252  0.006  0.022 -0.024 
genderMale:textbook.classificationOther     0.035 -0.289  0.007  0.025 -0.027 
genderMale:textbook.classificationOtherSBI  0.090 -0.745  0.019  0.064 -0.070 
                                           t.NSBI2 txtb.O t.OSBI gnM:.NSBI
genderMale                                                                
years.teaching.intro.stats.binned10-20                                    
years.teaching.intro.stats.binned5-10                                     
textbook.classificationNotSBI                                             
textbook.classificationNotSBI2                                            
textbook.classificationOther                0.007                         
textbook.classificationOtherSBI            -0.370   0.018                 
genderMale:textbook.classificationNotSBI   -0.024  -0.034 -0.056          
genderMale:textbook.classificationNotSBI2  -0.440  -0.026 -0.042  0.083   
genderMale:textbook.classificationOther    -0.021  -0.402 -0.049  0.095   
genderMale:textbook.classificationOtherSBI -0.053  -0.078 -0.170  0.246   
                                           gM:.NSBI2 gnM:.O
genderMale                                                 
years.teaching.intro.stats.binned10-20                     
years.teaching.intro.stats.binned5-10                      
textbook.classificationNotSBI                              
textbook.classificationNotSBI2                             
textbook.classificationOther                               
textbook.classificationOtherSBI                            
genderMale:textbook.classificationNotSBI                   
genderMale:textbook.classificationNotSBI2                  
genderMale:textbook.classificationOther     0.073          
genderMale:textbook.classificationOtherSBI  0.188     0.215

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-3.25105021 -0.61204041  0.01817228  0.65580869  2.99718212 

Number of Observations: 423
Number of Groups: 8 

Applying to whole dataset:

Turning some vars of allYrsFinal into factors

Show the code
allYrsFinal_factor <- allYrsFinal|>
  mutate(gender = factor(gender),
         instructor.gender= factor(instructor.gender),
         years.teaching.intro.stats.binned = factor( years.teaching.intro.stats.binned),
         instructor = factor(instructor))

Gender, textbook classification, years teaching intro stats

Show the code
textbook.fit <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification, 
                random = ~ 1|instructor, data= allYrsFinal, na.action=na.omit)

summary(textbook.fit)
Linear mixed-effects model fit by REML
  Data: allYrsFinal 
       AIC      BIC    logLik
  31311.83 31407.41 -15642.92

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.2128222 0.9281217

Fixed effects:  cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +      textbook.classification 
                                             Value  Std.Error    DF   t-value
(Intercept)                             0.11953057 0.04364982 11302  2.738398
genderMale                             -0.09575831 0.01841568 11302 -5.199825
years.teaching.intro.stats.binned10-20 -0.02099960 0.04517081 11302 -0.464893
years.teaching.intro.stats.binned20-30 -0.02374339 0.06811487 11302 -0.348579
years.teaching.intro.stats.binned30+    0.04461754 0.14446465   224  0.308847
years.teaching.intro.stats.binned5-10   0.03385650 0.04395613 11302  0.770234
textbook.classificationISI1st          -0.15001502 0.05493475 11302 -2.730786
textbook.classificationNotSBI          -0.09933388 0.04956882 11302 -2.003959
textbook.classificationNotSBI2         -0.20584842 0.06579719 11302 -3.128529
textbook.classificationOther           -0.06546614 0.10042603 11302 -0.651884
textbook.classificationOtherSBI        -0.11191696 0.04724361 11302 -2.368933
                                       p-value
(Intercept)                             0.0062
genderMale                              0.0000
years.teaching.intro.stats.binned10-20  0.6420
years.teaching.intro.stats.binned20-30  0.7274
years.teaching.intro.stats.binned30+    0.7577
years.teaching.intro.stats.binned5-10   0.4412
textbook.classificationISI1st           0.0063
textbook.classificationNotSBI           0.0451
textbook.classificationNotSBI2          0.0018
textbook.classificationOther            0.5145
textbook.classificationOtherSBI         0.0179
 Correlation: 
                                       (Intr) gndrMl y....1 y....2 y....3
genderMale                             -0.153                            
years.teaching.intro.stats.binned10-20 -0.412  0.002                     
years.teaching.intro.stats.binned20-30 -0.315 -0.002  0.333              
years.teaching.intro.stats.binned30+   -0.064  0.006  0.099  0.066       
years.teaching.intro.stats.binned5-10  -0.199  0.005  0.295  0.183  0.089
textbook.classificationISI1st          -0.578 -0.012  0.093  0.107 -0.003
textbook.classificationNotSBI          -0.660 -0.017  0.014  0.054 -0.088
textbook.classificationNotSBI2         -0.434  0.000 -0.061 -0.005 -0.010
textbook.classificationOther           -0.350  0.009  0.005  0.076  0.006
textbook.classificationOtherSBI        -0.734  0.002  0.168  0.142 -0.041
                                       y....5 t.ISI1 tx.NSBI t.NSBI2 txtb.O
genderMale                                                                 
years.teaching.intro.stats.binned10-20                                     
years.teaching.intro.stats.binned20-30                                     
years.teaching.intro.stats.binned30+                                       
years.teaching.intro.stats.binned5-10                                      
textbook.classificationISI1st          -0.082                              
textbook.classificationNotSBI          -0.107  0.493                       
textbook.classificationNotSBI2         -0.164  0.369  0.424                
textbook.classificationOther            0.073  0.236  0.281   0.191        
textbook.classificationOtherSBI        -0.052  0.587  0.638   0.415   0.320

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-5.33581741 -0.58706808  0.03741069  0.62501286  5.81740605 

Number of Observations: 11537
Number of Groups: 226 

Interaction plot Lines look pretty parallel

Show the code
textbook.fit_fct <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned + textbook.classification, 
                random = ~ 1|instructor, data= allYrsFinal_factor, na.action=na.omit)

textbook_plot <- ggpredict(textbook.fit_fct, 
          terms = c("gender", "textbook.classification", "instructor [sample=8]"),
          type = "re")

ggplot(textbook_plot, aes(x=x, y=predicted, group = group, color = group))+
  geom_line() +
  labs(y= "Cognitive Competence Change", x="Female = 0, Male = 1", color = "Textbook")

Proportion of males for each textbook

Show the code
allYrsFinal|>
  group_by(textbook.classification)|>
  summarize(
    total = n(),
    num_males = sum(gender == "Male", na.rm = TRUE)
  ) |>
  mutate(prop_males = num_males/total)
# A tibble: 7 × 4
  textbook.classification total num_males prop_males
  <chr>                   <int>     <int>      <dbl>
1 ISI                      2381       808      0.339
2 ISI1st                    860       344      0.4  
3 NotSBI                   3966      1465      0.369
4 NotSBI2                   796       273      0.343
5 Other                     266        91      0.342
6 OtherSBI                 3450      1206      0.350
7 <NA>                       11         2      0.182

Gender, 5-10 years, 10-20 years, NotSBI, OtherSBI genderMale:textbook.classificationNotSBI, genderMale:textbook.classificationOther significant

Show the code
textbook.fit_randGender <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +  textbook.classification, 
                random = ~ 1+gender|instructor, data= allYrsFinal, na.action=na.omit)

summary(textbook.fit_randGender)
Linear mixed-effects model fit by REML
  Data: allYrsFinal 
       AIC      BIC    logLik
  31308.45 31418.74 -15639.23

Random effects:
 Formula: ~1 + gender | instructor
 Structure: General positive-definite, Log-Cholesky parametrization
            StdDev     Corr  
(Intercept) 0.23472938 (Intr)
genderMale  0.09207456 -0.693
Residual    0.92720614       

Fixed effects:  cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +      textbook.classification 
                                             Value  Std.Error    DF   t-value
(Intercept)                             0.10810141 0.04387651 11302  2.463765
genderMale                             -0.09559130 0.02096547 11302 -4.559463
years.teaching.intro.stats.binned10-20 -0.02626291 0.04453107 11302 -0.589766
years.teaching.intro.stats.binned20-30 -0.02011840 0.06639049 11302 -0.303031
years.teaching.intro.stats.binned30+    0.05507746 0.13890797   224  0.396503
years.teaching.intro.stats.binned5-10   0.03594135 0.04355139 11302  0.825263
textbook.classificationISI1st          -0.13506991 0.05449194 11302 -2.478713
textbook.classificationNotSBI          -0.08253138 0.04879886 11302 -1.691256
textbook.classificationNotSBI2         -0.19268328 0.06520312 11302 -2.955124
textbook.classificationOther           -0.06250709 0.09996904 11302 -0.625264
textbook.classificationOtherSBI        -0.10260441 0.04674183 11302 -2.195130
                                       p-value
(Intercept)                             0.0138
genderMale                              0.0000
years.teaching.intro.stats.binned10-20  0.5554
years.teaching.intro.stats.binned20-30  0.7619
years.teaching.intro.stats.binned30+    0.6921
years.teaching.intro.stats.binned5-10   0.4092
textbook.classificationISI1st           0.0132
textbook.classificationNotSBI           0.0908
textbook.classificationNotSBI2          0.0031
textbook.classificationOther            0.5318
textbook.classificationOtherSBI         0.0282
 Correlation: 
                                       (Intr) gndrMl y....1 y....2 y....3
genderMale                             -0.244                            
years.teaching.intro.stats.binned10-20 -0.401 -0.013                     
years.teaching.intro.stats.binned20-30 -0.299 -0.041  0.333              
years.teaching.intro.stats.binned30+   -0.056 -0.027  0.102  0.070       
years.teaching.intro.stats.binned5-10  -0.198  0.000  0.295  0.186  0.091
textbook.classificationISI1st          -0.567  0.001  0.092  0.105 -0.003
textbook.classificationNotSBI          -0.646 -0.009  0.008  0.044 -0.088
textbook.classificationNotSBI2         -0.430  0.017 -0.059 -0.006 -0.010
textbook.classificationOther           -0.345  0.015  0.008  0.076  0.006
textbook.classificationOtherSBI        -0.720 -0.001  0.169  0.139 -0.044
                                       y....5 t.ISI1 tx.NSBI t.NSBI2 txtb.O
genderMale                                                                 
years.teaching.intro.stats.binned10-20                                     
years.teaching.intro.stats.binned20-30                                     
years.teaching.intro.stats.binned30+                                       
years.teaching.intro.stats.binned5-10                                      
textbook.classificationISI1st          -0.082                              
textbook.classificationNotSBI          -0.106  0.490                       
textbook.classificationNotSBI2         -0.155  0.367  0.422                
textbook.classificationOther            0.074  0.234  0.279   0.189        
textbook.classificationOtherSBI        -0.050  0.581  0.636   0.412   0.317

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-5.40748941 -0.58117379  0.03151791  0.62807530  5.84705080 

Number of Observations: 11537
Number of Groups: 226 
Show the code
textbook.fit_randGender_fct <- lme(cognitive.competence.change ~ gender + years.teaching.intro.stats.binned +  textbook.classification, 
                random = ~ 1+gender|instructor, data= allYrsFinal_factor, na.action=na.omit)
textbook_plot2 <- ggpredict(textbook.fit_randGender_fct, 
          terms = c("gender", "textbook.classification", "instructor [sample=8]"),
          type = "re")

ggplot(textbook_plot2, aes(x=x, y=predicted, group = group, color = group))+
  geom_line() +
  labs(y= "Cognitive Competence Change", x="Female = 0, Male = 1", color = "Textbook")

Regular regression: gender*textbook.classification

Show the code
reg_text.fit <- lm(cognitive.competence.change~gender*textbook.classification + years.teaching.intro.stats.binned + instructor.gender, data = allYrsFinal)

summary(reg_text.fit)

Call:
lm(formula = cognitive.competence.change ~ gender * textbook.classification + 
    years.teaching.intro.stats.binned + instructor.gender, data = allYrsFinal)

Residuals:
    Min      1Q  Median      3Q     Max 
-5.4433 -0.5608  0.0501  0.5848  5.3480 

Coefficients:
                                           Estimate Std. Error t value
(Intercept)                                 0.28293    0.03003   9.423
genderMale                                 -0.21058    0.04161  -5.061
textbook.classificationISI1st              -0.27745    0.04926  -5.632
textbook.classificationNotSBI              -0.29761    0.03209  -9.275
textbook.classificationNotSBI2             -0.38358    0.04858  -7.896
textbook.classificationOther               -0.09346    0.07693  -1.215
textbook.classificationOtherSBI            -0.20103    0.03265  -6.158
years.teaching.intro.stats.binned10-20     -0.03542    0.02457  -1.441
years.teaching.intro.stats.binned20-30     -0.06443    0.02772  -2.324
years.teaching.intro.stats.binned30+        0.17214    0.05057   3.404
years.teaching.intro.stats.binned5-10       0.02141    0.02665   0.803
instructor.genderMale                      -0.06794    0.01930  -3.521
genderMale:textbook.classificationISI1st    0.23176    0.07801   2.971
genderMale:textbook.classificationNotSBI    0.18186    0.05202   3.496
genderMale:textbook.classificationNotSBI2   0.13923    0.08210   1.696
genderMale:textbook.classificationOther     0.02400    0.13056   0.184
genderMale:textbook.classificationOtherSBI  0.07203    0.05368   1.342
                                                       Pr(>|t|)    
(Intercept)                                < 0.0000000000000002 ***
genderMale                                  0.00000042408515175 ***
textbook.classificationISI1st               0.00000001825396782 ***
textbook.classificationNotSBI              < 0.0000000000000002 ***
textbook.classificationNotSBI2              0.00000000000000315 ***
textbook.classificationOther                           0.224441    
textbook.classificationOtherSBI             0.00000000076324727 ***
years.teaching.intro.stats.binned10-20                 0.149506    
years.teaching.intro.stats.binned20-30                 0.020128 *  
years.teaching.intro.stats.binned30+                   0.000667 ***
years.teaching.intro.stats.binned5-10                  0.421730    
instructor.genderMale                                  0.000432 ***
genderMale:textbook.classificationISI1st               0.002977 ** 
genderMale:textbook.classificationNotSBI               0.000475 ***
genderMale:textbook.classificationNotSBI2              0.089946 .  
genderMale:textbook.classificationOther                0.854134    
genderMale:textbook.classificationOtherSBI             0.179669    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9458 on 11520 degrees of freedom
  (193 observations deleted due to missingness)
Multiple R-squared:  0.01564,   Adjusted R-squared:  0.01427 
F-statistic: 11.44 on 16 and 11520 DF,  p-value: < 0.00000000000000022
Show the code
num_classes <- length(unique(allYrsFinal$textbook.classification))
textbook_colors <- brewer.pal(min(num_classes, 6), "Dark2")

interaction.plot(x.factor=allYrsFinal$gender, trace.factor = allYrsFinal$textbook.classification, response = allYrsFinal$cognitive.competence.change,
                 xlab = "Gender",
                 ylab = "Cognitive Competence Change",
                 trace.label = "Textbook Classification",
                 col = textbook_colors)

Predicting ach gain

Random effects

  • Intercept StdDev = 0.1802149 -> some variability in ach.gain.24 between different instructors
  • Residual StdDev = 0.2890601

Fixed effects

  • genderMale has a positive significant effect on ach.gain.24 when compared to females
  • genderNA/Other does not have a significant effect on ach.gain.24 compared to females
  • years.teaching.intro.stats.binned10-20: does not have a significant effect on ach.gain.24 compared to 0-5 years
  • years.teaching.intro.stats.binned5-10 has a significant negative effect on ach.gain.24 compared to 0-5 years
  • cognitive.competence.change has a positive and significant effect on ach.gain.24
Show the code
calc.fit6 <- lme(ach.gain.24 ~ gender + years.teaching.intro.stats.binned + cognitive.competence.change, 
                random = ~ 1|instructor, data= calc_req_NA)

summary(calc.fit6)
Linear mixed-effects model fit by REML
  Data: calc_req_NA 
       AIC      BIC    logLik
  202.4965 234.7803 -93.24824

Random effects:
 Formula: ~1 | instructor
        (Intercept)  Residual
StdDev:   0.1802149 0.2890601

Fixed effects:  ach.gain.24 ~ gender + years.teaching.intro.stats.binned + cognitive.competence.change 
                                             Value  Std.Error  DF   t-value
(Intercept)                             0.20507282 0.08745810 412  2.344812
genderMale                              0.06597470 0.03061753 412  2.154801
genderNA/Other                         -0.27490222 0.28997011 412 -0.948036
years.teaching.intro.stats.binned10-20  0.04498717 0.14182863   6  0.317194
years.teaching.intro.stats.binned5-10  -0.12786515 0.03656786 412 -3.496654
cognitive.competence.change             0.03576860 0.01713373 412  2.087614
                                       p-value
(Intercept)                             0.0195
genderMale                              0.0318
genderNA/Other                          0.3437
years.teaching.intro.stats.binned10-20  0.7618
years.teaching.intro.stats.binned5-10   0.0005
cognitive.competence.change             0.0374
 Correlation: 
                                       (Intr) gndrMl gnNA/O y....1 y....5
genderMale                             -0.170                            
genderNA/Other                         -0.003  0.036                     
years.teaching.intro.stats.binned10-20 -0.599  0.000 -0.002              
years.teaching.intro.stats.binned5-10  -0.152  0.026 -0.038  0.091       
cognitive.competence.change            -0.011  0.093  0.000  0.022 -0.007

Standardized Within-Group Residuals:
        Min          Q1         Med          Q3         Max 
-5.05758716 -0.54758474  0.04772822  0.64230049  2.74008911 

Number of Observations: 424
Number of Groups: 8 

Interaction between gender and pre?

Show the code
calc.fit7 <- lme(cognitive.competence.change ~ gender + ach.gain.24 + textbook.classification, 
                random = ~ 1+gender|instructor, data= calc_req, na.action=na.omit)

summary(calc.fit7)
Linear mixed-effects model fit by REML
  Data: calc_req 
       AIC      BIC    logLik
  1074.348 1118.685 -526.1738

Random effects:
 Formula: ~1 + gender | instructor
 Structure: General positive-definite, Log-Cholesky parametrization
            StdDev    Corr  
(Intercept) 0.3766085 (Intr)
genderMale  0.1875762 -0.337
Residual    0.8210504       

Fixed effects:  cognitive.competence.change ~ gender + ach.gain.24 + textbook.classification 
                                     Value Std.Error  DF    t-value p-value
(Intercept)                     -0.1079576 0.2192687 412 -0.4923532  0.6227
genderMale                      -0.1134483 0.1385524 412 -0.8188110  0.4134
ach.gain.24                      0.2855871 0.1393892 412  2.0488463  0.0411
textbook.classificationNotSBI   -0.2710299 0.3512820   4 -0.7715451  0.4834
textbook.classificationNotSBI2   0.6282174 0.4833295   4  1.2997705  0.2635
textbook.classificationOther    -0.0531318 0.4687671   4 -0.1133437  0.9152
textbook.classificationOtherSBI  0.0279503 0.1061041 412  0.2634231  0.7924
 Correlation: 
                                (Intr) gndrMl ac..24 tx.NSBI t.NSBI2 txtb.O
genderMale                      -0.369                                     
ach.gain.24                     -0.223 -0.050                              
textbook.classificationNotSBI   -0.563  0.095  0.098                       
textbook.classificationNotSBI2  -0.383 -0.005  0.081  0.236                
textbook.classificationOther    -0.407  0.008  0.113  0.248   0.180        
textbook.classificationOtherSBI -0.141  0.002  0.189  0.079   0.058   0.066

Standardized Within-Group Residuals:
         Min           Q1          Med           Q3          Max 
-3.193750602 -0.602104601 -0.006958229  0.666769643  3.099360142 

Number of Observations: 423
Number of Groups: 8 

Trying K-means clustering?

Show the code
calc_req |>
  ggplot(aes(x= cognitive.competence.pre, y = cognitive.competence.change,  color=gender, shape = textbook.classification)) +
  geom_point(alpha=0.5)

Show the code
calc_cols <- calc_req |>
  select(textbook.classification, cognitive.competence.change, cognitive.competence.pre) |>
  filter(!is.na(textbook.classification), !is.na(cognitive.competence.change), !is.na(cognitive.competence.pre))

# one hot encoding
calc_cols <- calc_cols |>
  mutate(textbook.classification = factor(textbook.classification))|>
  pivot_wider(names_from = textbook.classification, values_from = textbook.classification, 
              values_fn = length, values_fill = list(textbook.classification = 0))

set.seed(123)
km.out <- kmeans(calc_cols, centers = 4, nstart = 20)
km.out
K-means clustering with 4 clusters of sizes 85, 56, 41, 53

Cluster means:
  cognitive.competence.change cognitive.competence.pre    NotSBI2    NotSBI
1                 -1.02941176                 5.825490 0.01176471 0.3176471
2                  0.88690476                 4.244048 0.07142857 0.1428571
3                 -0.39430894                 4.528455 0.00000000 0.1463415
4                 -0.01886792                 4.858491 0.09433962 0.1320755
   OtherSBI       ISI      Other
1 0.4941176 0.4235294 0.04705882
2 0.7500000 0.2857143 0.07142857
3 2.0487805 0.2926829 0.02439024
4 0.5660377 1.6981132 0.07547170

Clustering vector:
  [1] 4 2 2 1 4 4 4 4 2 1 3 1 1 1 4 1 1 2 1 1 3 3 2 2 2 2 1 3 4 1 1 4 1 1 1 3 1
 [38] 3 1 3 3 3 2 1 2 3 2 3 2 2 3 1 2 1 2 2 1 3 4 3 2 1 1 2 3 1 1 1 4 2 4 1 1 4
 [75] 1 2 1 3 1 3 2 3 4 4 4 3 3 1 2 4 3 1 4 2 3 3 1 1 1 3 2 1 4 1 1 2 3 1 3 1 4
[112] 3 1 3 3 3 3 4 1 4 1 1 1 3 1 2 1 1 2 2 4 1 1 1 2 3 4 2 2 2 2 4 2 2 3 3 3 2
[149] 3 4 1 1 1 3 2 4 1 4 2 2 3 2 3 2 2 1 1 3 2 1 2 4 4 1 4 4 4 1 1 1 1 4 1 1 2
[186] 1 1 1 1 2 4 4 1 2 1 4 4 2 4 4 2 4 4 4 1 2 4 4 4 4 4 2 1 2 1 1 4 1 2 1 4 1
[223] 1 1 4 1 1 1 1 4 4 2 2 2 4

Within cluster sum of squares by cluster:
[1] 165.78366  83.94048  72.24661 104.71593
 (between_SS / total_SS =  47.1 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
[6] "betweenss"    "size"         "iter"         "ifault"      
Show the code
# Decide how many clusters to look at
n_clusters <- 10

# Initialize total within sum of squares error: wss
wss <- numeric(n_clusters)

set.seed(123)

# Look over 1 to n possible clusters
for (i in 1:n_clusters) {
  # Fit the model: km.out
  km.out <- kmeans(calc_cols, centers = i, nstart = 20)
  # Save the within cluster sum of squares
  wss[i] <- km.out$tot.withinss
}

# Produce a scree plot
wss_df <- tibble(clusters = 1:n_clusters, wss = wss)
 
scree_plot <- ggplot(wss_df, aes(x = clusters, y = wss, group = 1)) +
    geom_point(size = 4)+
    geom_line() +
    scale_x_continuous(breaks = c(2, 4, 6, 8, 10)) +
    xlab('Number of clusters')
scree_plot

Show the code
km.out <- kmeans(calc_cols, centers = 5, nstart = 20)



calc_cols$cluster_id <- factor(km.out$cluster)
ggplot(calc_cols, aes(x= cognitive.competence.pre, y=cognitive.competence.change, color = cluster_id)) +
    geom_point(alpha = 0.5) 

Compare with only binary SBI

Show the code
unique(calc_req$textbook.classification)
[1] "NotSBI2"  "NotSBI"   "OtherSBI" "ISI"      "Other"   
Show the code
calc_req <- calc_req|>
  mutate(textbookSBI = case_when(textbook.classification == "ISI"~ "SBI",
                                 textbook.classification == "OtherSBI" ~ "SBI",
                                 textbook.classification == "NotSBI" ~ "Not SBI",
                                 textbook.classification == "NotSBI2" ~ "Not SBI",
                                 textbook.classification == "Other" ~ "Other"))
calc_req |>
  ggplot(aes(x= cognitive.competence.pre, y = cognitive.competence.change,  color=gender, shape = textbookSBI)) +
  geom_point(alpha=0.5)

Individual graphs

Show the code
calc_req |>
  ggplot(aes(x=gender, y = cognitive.competence.change)) + 
  geom_boxplot() + 
  geom_violin(alpha = 0.5, color = "olivedrab")

Show the code
calc_req |>
  #drop_na(cognitive.competence.change) |>
  ggplot(aes(x=instructor.gender , y = cognitive.competence.change)) + 
  geom_boxplot() + 
  geom_violin(alpha = 0.5, color = "olivedrab")

Show the code
calc_req |>
  ggplot(aes(x= years.teaching.experience, y = cognitive.competence.change)) + 
  geom_point() # not linear

Show the code
calc_req |>
  ggplot(aes(x= years.teaching.experience.binned, y = cognitive.competence.change)) + 
  geom_boxplot() + 
  geom_violin(alpha = 0.5, color = "olivedrab")

Show the code
calc_req|>
  group_by(institution) |>
  mutate(mean_change = mean(cognitive.competence.change)) |>
  ungroup()|>
  ggplot(aes(x= institution, y = mean_change)) + 
  geom_boxplot()

Cog Comp Change boxplots

Across calc pre-req classes

Show the code
calc_req|>
  ggplot(aes(x=gender, y=cognitive.competence.change)) + 
  geom_boxplot() + geom_violin(alpha = 0.5, color = "orchid")

Show the code
count(calc_req) # 513
# A tibble: 1 × 1
      n
  <int>
1   423
Show the code
calc_req |> 
  filter(is.na(gender))|> # 2 gender NAs
  count()
# A tibble: 1 × 1
      n
  <int>
1     0

Across non-calc pre-req classes

Show the code
non_calc <- allYrsFinal|>
  filter(math.prereq != "Calculus") 

non_calc|>
  ggplot(aes(x=gender, y=cognitive.competence.change)) + 
  geom_boxplot() + geom_violin(alpha = 0.5, color = "orchid")

Show the code
count(non_calc) # 13704 
# A tibble: 1 × 1
      n
  <int>
1 11115
Show the code
non_calc|>
  filter(is.na(gender))|> # 115 gender NAs
  count()
# A tibble: 1 × 1
      n
  <int>
1    62

Across 4-year schools - are there any outstanding ones?

Show the code
# allYrsFinal|>
#   select(institution)

#unique(allYrsFinal$institution)

allYrsFinal |>
  filter(carnegie.classification == "Baccalaureate College") |>
  group_by(factor(institution))|>
  summarize(mean_cog_comp_change = mean(cognitive.competence.change))|>
  arrange(mean_cog_comp_change)
# A tibble: 27 × 2
   `factor(institution)` mean_cog_comp_change
   <fct>                                <dbl>
 1 111                                -1.22  
 2 124                                -0.396 
 3 8                                  -0.281 
 4 119                                -0.269 
 5 98                                 -0.236 
 6 122                                -0.183 
 7 108                                -0.176 
 8 15                                 -0.124 
 9 71                                 -0.0816
10 99                                 -0.0794
# ℹ 17 more rows
Show the code
# worst: 40 : -3.2
# best: 28 : 0.875

Find worst instructor at worst school for cog comp change (Instructor50)

Show the code
allYrsFinal|>
  filter(institution == 40)|>
  group_by(factor(instructor))|>
  summarize(mean_cog_comp_change = mean(cognitive.competence.change))|>
  arrange(mean_cog_comp_change)
# A tibble: 1 × 2
  `factor(instructor)` mean_cog_comp_change
  <fct>                               <dbl>
1 Instructor195                       0.115

Find worst section (both were bad)

Show the code
allYrsFinal|>
  filter(instructor == "Instructor50")|>
  group_by(factor(section))|>
  summarize(mean_cog_comp_change = mean(cognitive.competence.change))|>
  arrange(mean_cog_comp_change)
# A tibble: 0 × 2
# ℹ 2 variables: factor(section) <fct>, mean_cog_comp_change <dbl>
Show the code
instr_50 <- allYrsFinal|>
  filter(instructor == "Instructor50")

instr_50["textbook.classification"] # NotSBI
# A tibble: 0 × 1
# ℹ 1 variable: textbook.classification <chr>
Show the code
instr_50 |>
  ggplot(aes(x=gender, y=cognitive.competence.change)) + 
  geom_boxplot() + geom_violin(alpha = 0.5, color = "orchid")

Show the code
instr_50 |>
  dplyr::select(ach.gain.24) # all NAs ?
# A tibble: 0 × 1
# ℹ 1 variable: ach.gain.24 <dbl>
Show the code
instr_50 |>
  group_by(gender)|> # 38:17 gender split
  count()
# A tibble: 0 × 2
# Groups:   gender [0]
# ℹ 2 variables: gender <chr>, n <int>
Show the code
instr_50["instructor.gender"] # female instructor
# A tibble: 0 × 1
# ℹ 1 variable: instructor.gender <chr>

Best instructor at best school for cog comp change (only 1 instructor: Instructor33)

Show the code
allYrsFinal |>
  filter(institution == 28)|>
  group_by(factor(instructor))|>
  summarize(mean_cog_comp_change = mean(cognitive.competence.change))|>
  arrange(mean_cog_comp_change)
# A tibble: 1 × 2
  `factor(instructor)` mean_cog_comp_change
  <fct>                               <dbl>
1 Instructor33                        0.644
Show the code
allYrsFinal |>
  filter(instructor == "Instructor33")|>
  group_by(factor(section))|>
  summarize(mean_cog_comp_change = mean(cognitive.competence.change))|> # sections pretty similar
  arrange(mean_cog_comp_change)
# A tibble: 2 × 2
  `factor(section)` mean_cog_comp_change
  <fct>                            <dbl>
1 1                                0.545
2 2                                0.747
Show the code
instr_33 <- allYrsFinal |>
  filter(instructor == "Instructor33")

instr_33["textbook.classification"] # NotSBI
# A tibble: 51 × 1
   textbook.classification
   <chr>                  
 1 NotSBI                 
 2 NotSBI                 
 3 NotSBI                 
 4 NotSBI                 
 5 NotSBI                 
 6 NotSBI                 
 7 NotSBI                 
 8 NotSBI                 
 9 NotSBI                 
10 NotSBI                 
# ℹ 41 more rows
Show the code
instr_33 |>
  ggplot(aes(x=gender, y=cognitive.competence.change)) + 
  geom_boxplot() + geom_violin(alpha = 0.5, color = "orchid")

Show the code
instr_33 |>
  drop_na(ach.gain.24)|>
  summarize(mean(ach.gain.24)) # 0.1689703  
# A tibble: 1 × 1
  `mean(ach.gain.24)`
                <dbl>
1               0.169
Show the code
instr_33 |>
  group_by(gender)|> # 39:21 gender split
  count()
# A tibble: 2 × 2
# Groups:   gender [2]
  gender     n
  <chr>  <int>
1 Female    35
2 Male      16
Show the code
instr_33["instructor.gender"] # female instructor
# A tibble: 51 × 1
   instructor.gender
   <chr>            
 1 Female           
 2 Female           
 3 Female           
 4 Female           
 5 Female           
 6 Female           
 7 Female           
 8 Female           
 9 Female           
10 Female           
# ℹ 41 more rows

Find top 10 sections in Year 3

Show the code
top10 <- allYrsFinal|>
  filter(year == "16-17") |>
  group_by(instructor.section)|>
  mutate(mean_cog_comp_change = mean(cognitive.competence.change))|>
  #arrange(mean_cog_comp_change)|>
  slice_max(order_by = mean_cog_comp_change, n =10)|>
  ungroup()

top10
# A tibble: 4,411 × 298
   instructor  section instructor.section opt.out.pre opt.out.post test.time.pre
   <chr>       <chr>   <chr>                    <dbl>        <dbl>         <dbl>
 1 Instructor… 1       Instructor100_Sec…           1            1            35
 2 Instructor… 1       Instructor100_Sec…           1            1          1469
 3 Instructor… 1       Instructor100_Sec…           1            1            29
 4 Instructor… 1       Instructor100_Sec…           1            1            51
 5 Instructor… 1       Instructor100_Sec…           1            1            27
 6 Instructor… 1       Instructor100_Sec…           1            1            37
 7 Instructor… 1       Instructor100_Sec…           1            1            35
 8 Instructor… 1       Instructor100_Sec…           1            1            34
 9 Instructor… 1       Instructor100_Sec…           1            1            25
10 Instructor… 1       Instructor100_Sec…           1            1            39
# ℹ 4,401 more rows
# ℹ 292 more variables: test.time.post <dbl>, q16a.pre.c.1415 <chr>,
#   q16b.pre.c.1415 <chr>, q16c.pre.c.1415 <chr>, q16a.pre.c <chr>,
#   q16b.pre.c <chr>, q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>,
#   q17.pre.c <chr>, q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>,
#   q21.pre.c <chr>, q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>,
#   q25.pre.c <chr>, q26.pre.c <chr>, q27.pre.c <chr>, q28.pre.c <chr>, …

Long format

Show the code
competenceFull <- c("q6e.pre.a", "q6e.post.a", "q7a.pre.a", "q7a.post.a", "q8f.pre.a", "q8f.post.a", "q9a.pre.a", "q9a.post.a", "q9b.pre.a", "q9b.post.a", "q9e.pre.a", "q9e.post.a")

questions_long_top10 <- top10 |>
  select(all_of(competenceFull), gender, ach.gain.24) |>
  pivot_longer(cols=all_of(competenceFull),
               names_to = "question",
               values_to = "score")

questions_long_top10 <- questions_long_top10 |>
  mutate(row_id = row_number(),
         preOrPost = case_when(str_detect(question, "pre") ~ "pre",
                               str_detect(question, "post") ~ "post"),
         question_num = str_extract(question, "q\\d+[a-z]"))


questions_wide_top10 <- questions_long_top10 |>
  pivot_wider(names_from = preOrPost, values_from = score, id_cols = c(row_id, gender, question_num)) |>
  select(-row_id) |>
  group_by(gender, question_num) |>
  fill(pre, post, .direction = "downup") |>
  ungroup()

questions_change_top10 <- questions_wide_top10 |>
  mutate(change = post - pre) 

Change Per Question by Gender Boxplots (top 10)

Show the code
questions_change_top10|>
  ggplot(aes(x=question_num, y=change, color = gender)) + 
  geom_boxplot() +
  labs(title="Top 10 sections: Change Per Question by Gender Boxplots")

Change Per Question by Gender Line Graph

Show the code
questions_change_top10|>
  mutate(question_num = factor(question_num, levels = unique(question_num))) |>
  group_by(gender, question_num) |>
  summarize(mean_change = mean(change)) |>
  ungroup() |>
  ggplot(aes(x=question_num, y=mean_change, color = gender, group = gender)) +
    geom_line() 

Find lowest 10 sections

Show the code
low10 <- allYrsFinal|>
  filter(year == "16-17") |>
  group_by(instructor.section)|>
  mutate(mean_cog_comp_change = mean(cognitive.competence.change))|>
  #arrange(mean_cog_comp_change)|>
  slice_min(order_by = mean_cog_comp_change, n =10)|>
  ungroup()

low10
# A tibble: 4,411 × 298
   instructor  section instructor.section opt.out.pre opt.out.post test.time.pre
   <chr>       <chr>   <chr>                    <dbl>        <dbl>         <dbl>
 1 Instructor… 1       Instructor100_Sec…           1            1            35
 2 Instructor… 1       Instructor100_Sec…           1            1          1469
 3 Instructor… 1       Instructor100_Sec…           1            1            29
 4 Instructor… 1       Instructor100_Sec…           1            1            51
 5 Instructor… 1       Instructor100_Sec…           1            1            27
 6 Instructor… 1       Instructor100_Sec…           1            1            37
 7 Instructor… 1       Instructor100_Sec…           1            1            35
 8 Instructor… 1       Instructor100_Sec…           1            1            34
 9 Instructor… 1       Instructor100_Sec…           1            1            25
10 Instructor… 1       Instructor100_Sec…           1            1            39
# ℹ 4,401 more rows
# ℹ 292 more variables: test.time.post <dbl>, q16a.pre.c.1415 <chr>,
#   q16b.pre.c.1415 <chr>, q16c.pre.c.1415 <chr>, q16a.pre.c <chr>,
#   q16b.pre.c <chr>, q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>,
#   q17.pre.c <chr>, q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>,
#   q21.pre.c <chr>, q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>,
#   q25.pre.c <chr>, q26.pre.c <chr>, q27.pre.c <chr>, q28.pre.c <chr>, …

Long format

Show the code
questions_long_low10 <- low10 |>
  select(all_of(competenceFull), gender, ach.gain.24) |>
  pivot_longer(cols=all_of(competenceFull),
               names_to = "question",
               values_to = "score")

questions_long_low10 <- questions_long_low10 |>
  mutate(row_id = row_number(),
         preOrPost = case_when(str_detect(question, "pre") ~ "pre",
                               str_detect(question, "post") ~ "post"),
         question_num = str_extract(question, "q\\d+[a-z]"))


questions_wide_low10 <- questions_long_low10 |>
  pivot_wider(names_from = preOrPost, values_from = score, id_cols = c(row_id, gender, question_num)) |>
  select(-row_id) |>
  group_by(gender, question_num) |>
  fill(pre, post, .direction = "downup") |>
  ungroup()

questions_change_low10 <- questions_wide_low10 |>
  mutate(change = post - pre) 

Change Per Question by Gender Boxplots (lowest 10)

Show the code
questions_change_top10|>
  ggplot(aes(x=question_num, y=change, color = gender)) + 
  geom_boxplot() +
  labs(title="Lowest 10 sections: Change Per Question by Gender Boxplots")

Change Per Question by Gender Line Graph (lowest 10)

Show the code
questions_change_top10|>
  mutate(question_num = factor(question_num, levels = unique(question_num))) |>
  group_by(gender, question_num) |>
  summarize(mean_change = mean(change)) |>
  ungroup() |>
  ggplot(aes(x=question_num, y=mean_change, color = gender, group = gender)) +
    geom_line() 

ISCAM instructors

Show the code
allYrsFinal|>
  filter(textbook.classification == "ISCAM")
# A tibble: 0 × 297
# ℹ 297 variables: instructor <chr>, section <chr>, instructor.section <chr>,
#   opt.out.pre <dbl>, opt.out.post <dbl>, test.time.pre <dbl>,
#   test.time.post <dbl>, q16a.pre.c.1415 <chr>, q16b.pre.c.1415 <chr>,
#   q16c.pre.c.1415 <chr>, q16a.pre.c <chr>, q16b.pre.c <chr>,
#   q16c.pre.c <chr>, q16d.pre.c <chr>, q16.pre.c <dbl>, q17.pre.c <chr>,
#   q18.pre.c <chr>, q19.pre.c <chr>, q20.pre.c <chr>, q21.pre.c <chr>,
#   q22.pre.c <chr>, q23.pre.c <chr>, q24.pre.c <chr>, q25.pre.c <chr>, …